

clear all
set more off
cap log close
set matsize 800

*************************
*************************
***First Stage Results***
***All Florida***********
***Pollution Monitors****
***Jenni Heissel*********
***August 22, 2017*******
*************************
*************************

***This do file revises the first stage, of wind direction from highways on 
*pollution

* globals
	global home 
	
global output 
global samples 

capture log close
log using "$output/first_stage_12418.log", replace

use "fst062018.dta"


tab hour


*create clustervar;
gen siteday = date*100 + EPA_closest

sum siteday EPA_closest date


***making initial table 0 add circtreat when we find the variable in the tables
*local treatment "downwind_20 windtreat circtreat"  
local treatment1 "  dwind_any_win1 dwintensity1h"  
local treatment2 "   dwind_any_win4 dwintensity4h"

gen mid1 = (mi_to_nid1>=0 & mi_to_nid1<0.05) if mi_to_nid1~=.
gen mid2 = (mi_to_nid1>=0.05 & mi_to_nid1<0.1) if mi_to_nid1~=.
gen mid3 = (mi_to_nid1>=0.1 & mi_to_nid1<0.15) if mi_to_nid1~=.
gen mid4 = (mi_to_nid1>=0.15 & mi_to_nid1<0.2) if mi_to_nid1~=.
gen mid5 = (mi_to_nid1>=0.2 & mi_to_nid1<0.25) if mi_to_nid1~=.
gen mid6 = (mi_to_nid1>=0.3 & mi_to_nid1<0.35) if mi_to_nid1~=.
gen mid7 = (mi_to_nid1>=0.3 & mi_to_nid1<0.35) if mi_to_nid1~=.
gen mid8 = (mi_to_nid1>=0.35 & mi_to_nid1<=0.4) if mi_to_nid1~=.
	


*standardize pollution values to be relative to mean and impute anypol outcome;



foreach n of varlist epaCO epaNO2 epaPM10 {
	su `n'
	gen `n'adj = (`n' - r(mean))/r(sd) 
}

*if two sites have pollution take the average;

 egen polimp = rowmean(epaCOadj epaNO2adj epaPM10adj) /*this imputes the average for missing roadmonitor values */


 *put these in logs to estimate as a % of the mean?

foreach n of varlist epaPM25 epaNO2 epaPM10 epaNO epaCO epaNOx {
	
	gen log`n' = log(`n') 
}
 
 
 
 local pollute1 "logepaPM10 logepaNO2" 
 local pollute2 "logepaCO polimp" 

	

*for each pollutant count the number of monitors we have;

foreach epa in `pollute1' `pollute2'{
	foreach dist in dwintensity4h dwind_any_win1 {
	
	display "`epa'"
	display "`dist'"
	
	tab EPAsite  mi_to_nid1 if `epa'~=. & `dist'~=.
						
						}
												}
***********************************************************************
***************************1/10th mile do all hours, no donut: 3 models, 
*********************************************************************	
	
	
	
	
	eststo clear
	foreach treat in `treatment1' {
		display "****************Use `treat' as treatment variable****************"


	foreach out in `pollute1'{
		display "****************`out' first stage****************"

	
	
*distance
		eststo: reg `out' `treat'  mid*, vce(cluster siteday)

* month (for seasonal variation) 

		eststo: reghdfe `out' `treat' mid*, absorb(month) vce(cluster siteday)
*site FE

		eststo: reghdfe `out' `treat', absorb(month EPA_closest) vce(cluster siteday)


			sum `out'
			local m = r(mean)
		estadd scalar pollute=`m'	

								}			
		
	
	
      esttab using "$output/pol1`treat'_071518_1.rtf", ///
	compress replace b(4) se(4) onecell label nonumbers  ///
	title (" first stage All hours") scalars(pollute) keep(`treat')	star(* .1 ** .05 *** .01)
	
	eststo clear
}
	
	
	
	
	eststo clear
	foreach treat in `treatment1' {
		display "****************Use `treat' as treatment variable****************"


	foreach out in `pollute2'{
		display "****************`out' first stage****************"

	
	
*distance
		eststo: reg `out' `treat'  mid*, vce(cluster siteday)

* month (for seasonal variation) 

		eststo: reghdfe `out' `treat' mid*, absorb(month) vce(cluster siteday)
*site FE

		eststo: reghdfe `out' `treat', absorb(month EPA_closest) vce(cluster siteday)


			sum `out'
			local m = r(mean)
		estadd scalar pollute=`m'	

								}			
		
	
	
      esttab using "$output/pol2`treat'_071518_1.rtf", ///
	compress replace b(4) se(4) onecell label nonumbers  ///
	title (" first stage All hours") scalars(pollute) keep(`treat')	star(* .1 ** .05 *** .01)
	
	eststo clear
}
	
	
	
	
											
***********************************************************************
***************************4/10ths mile do all hours, no donut: 3 models, 
*********************************************************************
eststo clear
	foreach treat in `treatment2' {
		display "****************Use `treat' as treatment variable****************"


	foreach out in `pollute1'{
		display "****************`out' first stage****************"

	
	
*no controls
		eststo: reg `out' `treat'  mid*, vce(cluster siteday)

* distance and month (for seasonal variation) 

		eststo: reghdfe `out' `treat' mid*, absorb(month) vce(cluster siteday)
*site FE

		eststo: reghdfe `out' `treat', absorb(month EPA_closest) vce(cluster siteday)


			sum `out'
			local m = r(mean)
		estadd scalar pollute=`m'	

								}			
		
	
	
      esttab using "$output/pol1`treat'_071518_4.rtf", ///
	compress replace b(4) se(4) onecell label nonumbers  ///
	title (" first stage All hours") scalars(pollute) keep(`treat')	star(* .1 ** .05 *** .01)
	
	eststo clear
}
	
	
	
											
***********************************************************************
***************************4/10ths mile do all hours, no donut: 3 models, 
*********************************************************************
eststo clear
	foreach treat in `treatment2' {
		display "****************Use `treat' as treatment variable****************"


	foreach out in `pollute2'{
		display "****************`out' first stage****************"

	
	
*no controls
		eststo: reg `out' `treat'  mid*, vce(cluster siteday)

* distance and month (for seasonal variation) 

		eststo: reghdfe `out' `treat' mid*, absorb(month) vce(cluster siteday)
*site FE

		eststo: reghdfe `out' `treat', absorb(month EPA_closest) vce(cluster siteday)


			sum `out'
			local m = r(mean)
		estadd scalar pollute=`m'	

								}			
		
	
	
      esttab using "$output/pol2`treat'_071518_4.rtf", ///
	compress replace b(4) se(4) onecell label nonumbers  ///
	title (" first stage All hours") scalars(pollute) keep(`treat')	star(* .1 ** .05 *** .01)
	
	eststo clear
}


***************now collapse to yearly average
	
*now collapse to yearly average for % downwind for each site.


gen cellct =1

sum polimp logepaNO2 logepaCO 


collapse (mean)  polimp logepaNO2 logepaCO  mid* dwind_any_win4 dwind_any_win1 (rawsum) cellnum=cellct [aw=cellct], by(EPA_closest)

sum polimp logepaNO2 logepaCO  [aw=cellnum]





reg polimp dwind_any_win4 mid* [aw=cellnum], robust


reg polimp dwind_any_win1 mid* [aw=cellnum], robust

reg logepaNO2 dwind_any_win4 [aw=cellnum], robust 

reg logepaCO dwind_any_win4 [aw=cellnum], robust 

reg logepaCO dwind_any_win1 [aw=cellnum], robust 



reg logepaNO2 dwind_any_win1 [aw=cellnum], robust 



	log close




